library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
odiBowling <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Statistics\\odicareerbowling.csv")
df <- odiBowling[, 1:10]
head(df)
## Name Balls Maidens Runs Wickets Average
## 1 Aaron James Finch 73 0 59 2 29.50
## 2 Aavishkar Madhav Salvi 172 3 120 4 30.00
## 3 Abhimanyu Mithun 180 1 203 3 67.66
## 4 Abhishek Mohan Nayar 18 0 17 0 NA
## 5 Abraham Benjamin de Villiers 180 0 180 7 25.71
## 6 Adam Charles Voges 301 1 276 6 46.00
## X5_Wicket_Hauls X10_Wicket_Hauls Strike_Rate Economy
## 1 0 0 36.50 4.84
## 2 0 0 43.00 4.18
## 3 0 0 60.00 6.76
## 4 NA NA NA 5.66
## 5 0 0 25.71 6.00
## 6 0 0 50.16 5.50
df <- na.omit(df)
head(df)
## Name Balls Maidens Runs Wickets Average
## 1 Aaron James Finch 73 0 59 2 29.50
## 2 Aavishkar Madhav Salvi 172 3 120 4 30.00
## 3 Abhimanyu Mithun 180 1 203 3 67.66
## 5 Abraham Benjamin de Villiers 180 0 180 7 25.71
## 6 Adam Charles Voges 301 1 276 6 46.00
## 7 Adam Fraser Milne 1463 5 1259 31 40.61
## X5_Wicket_Hauls X10_Wicket_Hauls Strike_Rate Economy
## 1 0 0 36.50 4.84
## 2 0 0 43.00 4.18
## 3 0 0 60.00 6.76
## 5 0 0 25.71 6.00
## 6 0 0 50.16 5.50
## 7 0 0 47.19 5.16
summary(df)
## Name Balls Maidens
## Aaron James Finch : 1 Min. : 9.0 Min. : 0.00
## Aavishkar Madhav Salvi : 1 1st Qu.: 339.5 1st Qu.: 1.00
## Abhimanyu Mithun : 1 Median : 1536.0 Median : 10.00
## Abraham Benjamin de Villiers: 1 Mean : 2956.4 Mean : 26.45
## Adam Charles Voges : 1 3rd Qu.: 4274.5 3rd Qu.: 34.00
## Adam Fraser Milne : 1 Max. :18433.0 Max. :308.00
## (Other) :205
## Runs Wickets Average X5_Wicket_Hauls
## Min. : 12 Min. : 1.00 Min. : 12.00 Min. : 0.000
## 1st Qu.: 336 1st Qu.: 8.00 1st Qu.: 28.05 1st Qu.: 0.000
## Median : 1344 Median : 40.00 Median : 33.44 Median : 1.000
## Mean : 2355 Mean : 76.33 Mean : 38.34 Mean : 1.976
## 3rd Qu.: 3564 3rd Qu.:106.00 3rd Qu.: 38.73 3rd Qu.: 3.000
## Max. :13575 Max. :523.00 Max. :172.00 Max. :15.000
##
## X10_Wicket_Hauls Strike_Rate Economy
## Min. : 0.000 Min. : 9.00 Min. :2.850
## 1st Qu.: 0.000 1st Qu.: 33.30 1st Qu.:4.740
## Median : 0.000 Median : 39.54 Median :5.080
## Mean : 0.872 Mean : 44.33 Mean :5.149
## 3rd Qu.: 1.000 3rd Qu.: 46.85 3rd Qu.:5.540
## Max. :10.000 Max. :156.00 Max. :8.000
##
set.seed(20)
BWE <- df %>%
select(2, 5, 10)
df2 <- df %>%
select(3, 4, 8)
BWECluster <- kmeans(BWE, 5)
BWECluster$cluster <- as.factor(BWECluster$cluster)
plot_ly(BWE, x = ~Wickets, y = ~Economy, type = 'scatter',
mode = 'markers', color = BWECluster$cluster,
text = ~paste('Name: ', df$Name)) %>%
layout(title = "Cluster of wickets & economy")